home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Personal Computer World 2005 October
/
PCWOCT05.iso
/
Software
/
FromTheMag
/
Ant Movie Catalog 3.5.0.2
/
amc_install.exe
/
{app}
/
Scripts
/
StringUtils7552.pas
< prev
next >
Wrap
Pascal/Delphi Source File
|
2005-02-11
|
18KB
|
454 lines
unit StringUtils7552;
{
See comments in StringUtils1.pas for general infos
This file was created by scorpion7552
Of course, you can use these functions in your scripts :
simply add "StringUtils7552" in the uses clause of your script.
note that I also use StringUtils1 here, so you get the 2 for the same price...
BUT remember that I have created this file for my own needs and that these
functions/procedures may change one day or another without notification,
... or even disappear
}
uses
StringUtils1;
const
crlf = #13#10; // carriage return/line feed
// I use the 2 next characters to separate fields in a string
// (for parsing without confusion with real characters)
sepchar1 = #02; // internal separator 1
sepchar2 = #03; // internal separator 2
// special characters
spec1 = '" ! ? , : ; / ( )';
spec2 = ''' . -';
var
FormatUTF8: Integer; // flag: see FormatText
memoAdr, memoTxt: TStringList; // memo lists: see SelectMovie
// general purpose
//------------------------------------------------------------------------------
// sort a StringList (ascending)
//------------------------------------------------------------------------------
procedure SortList(stringl: TStringList);
var
i1, i2, imin: Integer;
min, min2: String;
begin
for i1 := 0 to stringl.count -2 do
begin
min := stringl.GetString(i1); // current = min
imin := i1;
// search the smallest entry in next
for i2 := i1 +1 to stringl.count-1 do
begin
min2 := stringl.GetString(i2);
if min2 < min then
begin // current (i2) = new min
min := min2; // memorize it and continue
imin := i2;
end;
end; {for i2}
if imin <> i1 then // swap current (i1) and new min (imin)
stringl.Exchange(i1, imin);
end; {for i1}
end;
//------------------------------------------------------------------------------
// returns the number of words of a string
//------------------------------------------------------------------------------
function Words(str: string) :integer;
var
vcountwords: TStringList;
begin
vcountwords := TStringList.Create;
vcountwords.Text := StringReplace(str, ' ', crlf); // parse words
result := vcountwords.Count;
vcountwords.Free;
end;
//------------------------------------------------------------------------------
// returns percentage (integer) of words of string1 found in string2
// of course, both strings must be formatted using the same routine
// (like CleanString for example)
// note that 100 means exact match
//------------------------------------------------------------------------------
function CompareWords(str1, str2: string) :integer;
var
wcount, i: integer;
strl1, strl2: TStringList;
w: string;
begin
wcount := 0; // counter
strl1 := TStringList.Create;
strl1.Text := StringReplace(str1, ' ', crlf); // parse words
strl2 := TStringList.Create;
// strange: we can use IndexOfName and GetName but not GetValue and IndexOf ???
strl2.Text := StringReplace(str2+'=', ' ', '='+crlf); // for IndexOfName
for i := 0 to strl1.Count -1 do // look for words of string1
begin
w := strl1.GetString(i); // current word of string1
if w = '' then continue;
if strl2.IndexOfName(w) <> -1 then // match
wcount := wcount +1;
end; {for i}
if strl1.Count > 0 then // don't like 'divide by zero' !!!
begin
wcount := (wcount * 100) div strl1.Count; // percentage
// if all words of string1 have been found in string2 (in any order)
// but string2 is longer than string1, we can't count that as exact match !
if (wcount = 100) and (strl2.Count > strl1.Count) then
wcount := wcount - (strl2.Count - strl1.Count);
end;
result := wcount;
end;
//------------------------------------------------------------------------------
// returns the movie name stored in amc
//------------------------------------------------------------------------------
Function GetMovieName:string;
begin
result := GetField(fieldTranslatedTitle); // first translated
if result = '' then result := GetField(fieldOriginalTitle); // or original
end;
//------------------------------------------------------------------------------
// returns a string translated to lowercase without accents
//------------------------------------------------------------------------------
function AnsiLowerCaseNoAccents(str1: string) :string;
begin
str1 := AnsiLowerCase(str1);
str1 := StringReplace(str1, 'α', 'a');
str1 := StringReplace(str1, 'ß', 'a');
str1 := StringReplace(str1, 'Γ', 'a');
str1 := StringReplace(str1, 'π', 'a');
str1 := StringReplace(str1, 'Σ', 'a');
str1 := StringReplace(str1, 'π', 'a');
str1 := StringReplace(str1, 'Θ', 'e');
str1 := StringReplace(str1, 'Φ', 'e');
str1 := StringReplace(str1, 'δ', 'e');
str1 := StringReplace(str1, 'Ω', 'e');
str1 := StringReplace(str1, '∩', 'i');
str1 := StringReplace(str1, 'ε', 'i');
str1 := StringReplace(str1, '∞', 'i');
str1 := StringReplace(str1, 'φ', 'i');
str1 := StringReplace(str1, '⌠', 'o');
str1 := StringReplace(str1, '÷', 'o');
str1 := StringReplace(str1, '⌡', 'o');
str1 := StringReplace(str1, '≥', 'o');
str1 := StringReplace(str1, '≤', 'o');
str1 := StringReplace(str1, '∙', 'u');
str1 := StringReplace(str1, 'ⁿ', 'u');
str1 := StringReplace(str1, '√', 'u');
str1 := StringReplace(str1, '·', 'u');
str1 := StringReplace(str1, 'τ', 'c');
str1 := StringReplace(str1, '±', 'n');
result := str1;
end;
//------------------------------------------------------------------------------
// returns a string with 1st article removed (first word only)
//------------------------------------------------------------------------------
function RemoveArticles(str1: string) :string;
var
Articles: array of string;
i: integer;
str2: String;
begin
SetArrayLength(Articles,32);
Articles[0]:='le ';
Articles[1]:='la ';
Articles[2]:='l''';
Articles[3]:='l ';
Articles[4]:='les ';
Articles[5]:='des ';
Articles[6]:='un ';
Articles[7]:='une ';
Articles[8]:='the ';
Articles[9]:='a ';
Articles[10]:='an ';
Articles[11]:='der ';
Articles[12]:='das ';
Articles[13]:='die ';
Articles[14]:='dem ';
Articles[15]:='den ';
Articles[16]:='ein ';
Articles[17]:='eine ';
Articles[18]:='einen ';
Articles[19]:='einer ';
Articles[20]:='eines ';
Articles[21]:='einem ';
Articles[22]:='uno ';
Articles[23]:='una ';
Articles[24]:='el ';
Articles[25]:='los ';
Articles[26]:='las ';
Articles[27]:='unos ';
Articles[28]:='unas ';
Articles[29]:='il ';
Articles[30]:='lo ';
Articles[31]:='i ';
str2 := AnsiLowerCase(str1);
for i := 0 to GetArrayLength(articles)-1 do
begin
if Pos(Articles[i], str2) = 1 then
begin
str1 := Copy(str1, Length(Articles[i])+1, length(str1));
Break;
end;
end; {for i}
result := Trim(str1);
end;
//------------------------------------------------------------------------------
// returns a string with special characters translated (& etc...)
//------------------------------------------------------------------------------
function TranslateSpecial(str1: string) :string;
begin
// sometimes, pages are coded with (yes, I have seen that...)
// Don't know why they don't code directly (mistake ?)
// so first change '&' with '&', then HTMLDecode will be happy
str1 := StringReplace(str1, '&', '&');
// translate special characters
HTMLDecode(str1);
result := Trim(str1);
end;
//------------------------------------------------------------------------------
// returns a string with blanks and crlf compacted (and more...)
// if flag = 'spec1' : remove special characters (spec1)
// and/or flag = 'spec2' : replace special characters (spec2) with blank
//------------------------------------------------------------------------------
function CompactString(str1, flag: String) :string;
var
s1, m, str2 : string;
i: integer;
begin
if flag <> '' then // replace & ... stuffs
str1 := TranslateSpecial(str1);
m := '';
str2 := '';
i := 0;
while (i < Length(str1)) do
begin
i := i + 1;
s1 := Copy(str1, i, 1); // current character of str1
if s1 = #09 then s1 := ' '; // replace Tab ('09'x) with blank
if (s1 <> ' ') and (flag <> '') then // treat special characters
begin
if Pos('spec1', flag) > 0 then // remove spec1
if Pos(s1, spec1) > 0 then continue;
if Pos('spec2', flag) > 0 then // replace spec2
if Pos(s1, spec2) > 0 then s1 := ' ';
end;
if (s1 = ' ') and (s1 = m) then continue; // ignore multiple blanks
if s1 = #13 then // cr: maybe crlf
begin
s1 := Copy(str1, i, 2);
i := i +1;
if (s1 = crlf) and (s1 = m) then continue; // ignore multiple crlf
end;
m := s1; // memo current character(s)
str2 := str2+s1; // and store
end; {while i < length(str1)}
result := Trim(str2);
end;
//------------------------------------------------------------------------------
// returns a string formatted according to the following convention
// final_text := TranslateText(initial_text, format_type);
// format_type (integer)
// 0 : no change
// 1 : change all characters to lowercase
// 2 : change all characters to uppercase
// 3 : first character to uppercase, the others to lowercase
// 4 : all first characters of words to uppercase, the others to lowercase
//------------------------------------------------------------------------------
function TranslateText(str1: string; f: integer) :string;
begin
case f of
1: result := AnsiLowerCase(str1);
2: result := AnsiUpperCase(str1);
3: result := AnsiUpFirstLetter(AnsiLowerCase(str1));
4: result := AnsiMixedCase(AnsiLowerCase(str1), ' ');
else result := str1;
end;
end;
//------------------------------------------------------------------------------
// dump a string to disk
// DumpPage(path_of_the_file,string)
// path_of_the_file = complete path (ie: 'c:\temp\myfile.txt')
// note: the directory (if any) must be created before
//------------------------------------------------------------------------------
procedure DumpPage(filePath, WholeText: string);
var
page: TStringList;
begin
page := TStringList.Create;
page.Text := WholeText;
page.SaveToFile(filePath);
page.Free;
end;
//------------------------------------------------------------------------------
// create and display a list (of movies or what you want)
// and returns the selected address or ''
// addr := SelectMovie('title_for_display');
// note: global TStringList's must be initialized
// memoAdr. = url of page (or what you want)
// memo.Text. = text to display (you can separate tokens with sepchar1)
//------------------------------------------------------------------------------
function SelectMovie(title: string) :string;
var
Address: String;
i: integer;
begin
PickTreeClear; // clear list
PickTreeAdd(title, '');
for i:= 0 to memoTxt.Count -1 do // create the list
PickTreeAdd(StringReplace(memoTxt.GetString(i), sepchar1, ''), memoAdr.GetString(i));
result := '';
if PickTreeExec(Address) then result := Address;
end;
// more or less specific
//------------------------------------------------------------------------------
// returns the url contained in a string without edition
// addr := GetUrl(string_containing_url, start_from_or_'',base_url_or_'');
//------------------------------------------------------------------------------
function GetUrl(WholeText, StartFrom, urlb: string) :string;
var
i: Integer;
delim: char;
begin
result := '';
if StartFrom <> '' then // if StartFrom = '', start from begining of string
begin
i := Pos(StartFrom, WholeText);
if i = 0 then exit; // StartFrom not found
Delete(WholeText,1, i -1); // delete characters before StartFrom
end;
i := Pos('HREF=', AnsiUpperCase(WholeText)); // start of url: href=
if i = 0 then exit; // no href= found
Delete(WholeText,1, i +4); // skip href=
WholeText := TextBefore(WholeText, '>', ''); // stop at the end of tag
delim := StrGet(WholeText, 1); // delimiter = " or ' or nothing special
if (delim = '''') or (delim = '"') then // skip ' or "
Delete(WholeText, 1, 1)
else
delim := ' '; // no delimiter: stop at first blank if any
i := Pos(delim, WholeText);
if i > 0 then Delete(WholeText,i, Length(WholeText));
WholeText := StringReplace(WholeText, '&', '&');
WholeText := StringReplace(WholeText, '../', ''); // cf relative address
WholeText := StringReplace(WholeText, './', '');
WholeText := urlb + WholeText; // add base url if any
result := Trim(WholeText);
end;
//------------------------------------------------------------------------------
// returns a string formatted for display - special stuffs
// see comments in FormatText2
//------------------------------------------------------------------------------
function FormatText(initialText: string) :string;
begin
// paragraphs (HTML tags) = crlf (that's my choice, isn't it?)
initialText := StringReplace(initialText, '</p>', crlf);
initialText := StringReplace(initialText, '<p>', crlf);
// now "standard" formatting
result := FormatText2(initialText);
end;
//------------------------------------------------------------------------------
// returns a string formatted for display - this text may contain html tags
// and special characters( & < > " )
// formatted_text := FormatText(initial_text);
// if your text is coded using UTF-8 then you must code in the caller script:
// FormatUTF8 := 1;
// if all your texts are in pure ASCII, then you have nothing to do ...
// or if you have a mix, then you must set FormatUTF8 accordingly (0 or 1)
//------------------------------------------------------------------------------
function FormatText2(initialText: string) :string;
var
s: char;
i: integer;
begin
result := '';
if initialText = '' then exit; // nothing to convert
if FormatUTF8 = 1 then
begin
initialText := UTF8Decode(initialText); // UTF-8 to ASCII
// some strange characters not translated....
initialText := StringReplace(initialText, #160, ' '); // 'A0'x
end;
// suppress HTML tags and translate special characters (& ...)
HTMLRemoveTags(initialText);
initialText := TranslateSpecial(initialText);
// suppress formatting characters at the begining and at the end of string
// (except sepchar1 and sepchar2)
while (initialText <> '') do
begin
s := StrGet(initialText, 1); // 1st character of initialText
if (s = #0) or (s > #32) or (s = sepchar1) or (s = sepchar2) then break; // ended
Delete(initialText, 1, 1); // out
end;
//
while (initialText <> '') do
begin
i := Length(initialText);
s := StrGet(initialText, i); // last character of initialText
if (s = #0) or (s > #32) or (s = sepchar1) or (s = sepchar2) then break; // ended
Delete(initialText, i, 1); // out
end;
// and compact string (leaving spec1 and spec2 asis)
result := CompactString(initialText, '');
end;
//------------------------------------------------------------------------------
// returns the movie name formatted (for input to search engines)
// note that this is not very universal, but maybe can fit your needs...
//------------------------------------------------------------------------------
function FormatMovieName(str: string) :string;
var
i: integer;
begin
// sometimes, movie names are coded as 'usual title/alternate title'
// keep only the usual name
i := Pos('/', str);
if i > 0 then str := Left(str, i-1);
str := AnsiLowerCaseNoAccents(str); // lower case without accents
// some search engines limit the number of key words used
// so it's better to remove the 1st article (if more than 3 words)
if Words(str) > 3 then
str := RemoveArticles(str);
// compact string and remove spec1 (don't treat spec2 here)
result := CompactString(str, 'spec1');
end;
//------------------------------------------------------------------------------
// returns a string with all special characters suppressed (for comparisons)
//------------------------------------------------------------------------------
function CleanString(str1: string) :string;
begin;
str1 := AnsiLowerCaseNoAccents(str1); // lowercase without accents
result := CompactString(str1, 'spec1 spec2'); // compact string (treat spec1 and spec2)
end;
end.